Bus Network Graph: Experiments on real data¶
In [ ]:
from helper import *
from network.shortest_paths import *
from elements import Stop, Variant, Path
from queries import StopQuery, VariantQuery, PathQuery
from network.shortest_paths import NetworkAnalysisBetweenness
from network.bus import BusNetwork, BusNetworkDijkstra
from ipyleaflet import Map, GeoJSON
import pandas as pd
import itertools as iters
import random
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from tqdm import tqdm
import timeit
import plotly.io as pio
pio.renderers.default = "jupyterlab"
In [ ]:
net_analysis = NetworkAnalysisBetweenness()
dijkstra = BusNetworkDijkstra()
net = BusNetwork.from_ndjsons(sides_set_type='spatial')
sides_set_type = spatial
100%|██████████| 297/297 [00:10<00:00, 29.66it/s]
In [ ]:
stops = StopQuery.from_ndjson()
vars = VariantQuery.from_ndjson()
Network Construction¶
Degree distribution of Stops¶
In [ ]:
def mod_row(row):
if row['StopType'] in [
'Ô sơn',
'Biển treo',
'Trạm tạm',
'Bến bãi QH chung QH',
'Ga Metro Số 2',
'Bến Bãi QH 568'
]:
row['StopType'] = 'Others'
return row
df_nodes = pd.DataFrame([
stop.to_dict()
for stop in net.nodes.values()
]).apply(mod_row, axis=1)
df_nodes['Degree'] = df_nodes['Routes'].apply(lambda s: len(s.split()))
df_nodes = df_nodes[['StopId', 'StopType', 'Degree']]
df_nodes
Out[ ]:
| StopId | StopType | Degree | |
|---|---|---|---|
| 0 | 35 | Bến xe | 26 |
| 1 | 7276 | Trụ dừng | 21 |
| 2 | 7277 | Trụ dừng | 19 |
| 3 | 7278 | Nhà chờ | 14 |
| 4 | 7265 | Nhà chờ | 5 |
| ... | ... | ... | ... |
| 4392 | 7682 | Others | 1 |
| 4393 | 7683 | Others | 1 |
| 4394 | 7684 | Others | 1 |
| 4395 | 7685 | Others | 1 |
| 4396 | 7686 | Others | 1 |
4397 rows × 3 columns
In [ ]:
bins = [*range(15), 15, 50]
lens = df_nodes.groupby(pd.cut(df_nodes['Degree'], bins=bins)).apply(len)
bins[-1] = '15+'
fig = px.bar(y=bins[1:], x=lens, height=800, width=1400, log_x=True, text_auto=True, color=lens, orientation='h')
fig.update_yaxes(type='category')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(
title="Degree distribution of bus stops",
xaxis_title="Count",
yaxis_title="Degree (# of routes passing)",
# legend_title="Legend Title",
font=dict(
size=18,
# color="RebeccaPurple"
)
)
# fig.update_traces(textposition='outside')
fig.show(renderer='notebook')
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_25672\1233675535.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
In [ ]:
bins = [*range(15), 15, 50]
lens_obj = df_nodes.groupby('StopType').apply(
# # print
# axis='index',
lambda g: (g.groupby(pd.cut(df_nodes['Degree'], bins=bins)).apply(len))
# lambda g: g.value_counts()
).stack().to_dict()
lens_obj
def get_label(label):
if label.right > 15:
return '15+'
return str(label.right)
lens_df = pd.DataFrame([
{
'StopType': stop_type,
'Label': get_label(label),
'Count': count
}
for (stop_type, label), count in lens_obj.items()
])
lens_df
fig = px.bar(
lens_df, x='Count', y='Label', facet_col='StopType', facet_col_wrap=2,
height=1350, width=1800, color='Count',
log_x=True, text_auto=True, orientation='h'
)
fig.update_yaxes(title="Degree (# of routes passing)", type='category')
fig.update_xaxes(title="Count")
fig.update_layout(
title="Degree distribution",
# legend_title="Legend Title",
font=dict(
size=16,
)
)
fig.show(renderer='notebook')
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_31452\1622963036.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. C:\Users\ADMIN\AppData\Local\Temp\ipykernel_31452\1622963036.py:2: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
StopType Spatial Distribution¶
In [ ]:
def mod_row(row):
if row['StopType'] in [
'Trạm tạm',
'Bến bãi QH chung QH',
'Ga Metro Số 2',
'Bến Bãi QH 568'
]:
row['StopType'] = 'Others'
if row['StopType'] in [
'Ô sơn',
'Biển treo'
]:
row['StopType'] = 'Ô sơn / Biển treo'
return row
type_stops_df = pd.DataFrame(
{
**stops[stop_id].to_dict()
}
for stop_id in stops.ids
)
type_stops_df = type_stops_df[['StopId', 'Code', 'Name', 'StopType', 'Zone', 'Street', 'Lng', 'Lat']].apply(mod_row, axis=1)
type_stops_df
Out[ ]:
| StopId | Code | Name | StopType | Zone | Street | Lng | Lat | |
|---|---|---|---|---|---|---|---|---|
| 0 | 35 | BX 01 | Bến xe buýt Sài Gòn | Bến xe | Quận 1 | Lê Lai | 106.689362 | 10.767676 |
| 1 | 7276 | Q1 190 | Tôn Thất Tùng | Trụ dừng | Quận 1 | Phạm Ngũ Lão | 106.690941 | 10.767670 |
| 2 | 7277 | Q1 191 | Nguyễn Thị Nghĩa | Trụ dừng | Quận 1 | Phạm Ngũ Lão | 106.693699 | 10.768788 |
| 3 | 7278 | Q1 192 | Trường Emst Thalmann | Nhà chờ | Quận 1 | Phạm Ngũ Lão | 106.695587 | 10.769504 |
| 4 | 7265 | Q1 179 | Trạm Trung chuyển trên đường Hàm Nghi | Nhà chờ | Quận 1 | Hàm Nghi | 106.700259 | 10.770911 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4392 | 7682 | SWB1_1 | Ga tàu thuỷ Bạch Đằng | Others | Quận 1 | Tôn Đức Thắng | 106.706901 | 10.775264 |
| 4393 | 7683 | SWB1_4 | Ga tàu thuỷ Bình An | Others | Quận 2 | Đường số 21 | 106.728734 | 10.797349 |
| 4394 | 7684 | SWB1_8 | Ga tàu thuỷ Thanh Đa | Others | Quận Bình Thạnh | Cư xá Thanh Đa | 106.715757 | 10.819385 |
| 4395 | 7685 | SWB1_10 | Ga tàu thuỷ Hiệp Bình Chánh | Others | Quận Thủ Đức | Đường số 10 | 106.731475 | 10.827231 |
| 4396 | 7686 | SWB1_11 | Ga tàu thuỷ Linh Đông | Others | Quận Thủ Đức | Đường số 36 | 106.746254 | 10.835287 |
4397 rows × 8 columns
In [ ]:
fig = px.scatter_geo(
type_stops_df, lat='Lat', lon='Lng', color='StopType', width=1400, height=1000, range_color=[0, 2000000],
hover_data=['StopId', 'Code', 'Name', 'Zone', 'StopType']
)
fig.update_layout(
title='Spatial distribution of StopType',
font=dict(
size=17
),
geo=dict(
scope = 'asia',
resolution = 110,
lonaxis_range= [106.4, 106.9],
lataxis_range= [10.6, 11],
landcolor = 'rgb(240, 240, 240)',
)
)
fig.show(renderer='notebook')
Relationship between Route distance and running time¶
In [ ]:
df_vars = pd.DataFrame([var.to_dict() for var in vars._objs.values()])
df_vars['RouteVarName'] = df_vars['StartStop'] + ' - ' + df_vars['EndStop']
df_vars['Speed'] = df_vars['Distance'] / df_vars['RunningTime'] * 0.06
df_vars['RouteId'] = df_vars['RouteIds'].apply(lambda ids: ids[0])
df_vars['RouteVarId'] = df_vars['RouteIds'].apply(lambda ids: ids[1])
df_vars = df_vars[['RouteId', 'RouteVarId', 'RouteNo', 'RouteVarName', 'Distance', 'RunningTime', 'Speed']]
In [ ]:
# px.histogram(filtered_vars_df['Distance'], nbins=100)
fig = px.scatter(
df_vars, y='Distance', x='RunningTime', marginal_x='box', marginal_y='box', width=1600, height=900,
trendline='ols', trendline_options={"add_constant": False},
hover_data=['RouteVarName', 'RouteId', 'RouteVarId'],
color='Speed', color_continuous_scale=px.colors.sequential.Agsunset, range_color=[5, 40]
)
fig.update_layout(
title="Relationship between Distance and RunningTime",
# legend_title="Legend Title",
font=dict(
size=20,
)
)
fig.show(renderer='notebook')
Running time of construction algorithms¶
In [ ]:
def run_gen(sides_set_type: str):
BusNetwork.from_ndjsons(sides_set_type=sides_set_type)
NO_RUNS = 5
times = {}
for mode in ['default', 'spatial']:
print('Running mode', mode)
times[mode] = [
timeit.timeit("run_gen(sides_set_type=mode)", globals=globals(), number=1)
for _ in range(NO_RUNS)
]
times
Running mode default sides_set_type = default
100%|██████████| 297/297 [00:42<00:00, 6.92it/s]
sides_set_type = default
100%|██████████| 297/297 [00:42<00:00, 7.00it/s]
sides_set_type = default
100%|██████████| 297/297 [00:39<00:00, 7.54it/s]
sides_set_type = default
100%|██████████| 297/297 [00:39<00:00, 7.60it/s]
sides_set_type = default
100%|██████████| 297/297 [00:38<00:00, 7.71it/s]
Running mode spatial sides_set_type = spatial
100%|██████████| 297/297 [00:07<00:00, 38.92it/s]
sides_set_type = spatial
100%|██████████| 297/297 [00:07<00:00, 38.38it/s]
sides_set_type = spatial
100%|██████████| 297/297 [00:07<00:00, 38.87it/s]
sides_set_type = spatial
100%|██████████| 297/297 [00:07<00:00, 39.59it/s]
sides_set_type = spatial
100%|██████████| 297/297 [00:07<00:00, 39.40it/s]
Out[ ]:
{'default': [43.38411490013823,
42.890710399951786,
39.85574440006167,
39.51483920007013,
38.97113410010934],
'spatial': [8.083511600038037,
8.161453299922869,
8.059146600076929,
7.91556299990043,
7.955796599853784]}
In [ ]:
df_times = pd.DataFrame(times, index=['Run #{}'.format(x) for x in range(1, 1 + NO_RUNS)])
NO_ITERS = 297
for mode in ['default', 'spatial']:
df_times[mode + '_it_per_sec'] = NO_ITERS / df_times[mode]
df_times = df_times[['default', 'default_it_per_sec', 'spatial', 'spatial_it_per_sec']]
df_times
Out[ ]:
| default | default_it_per_sec | spatial | spatial_it_per_sec | |
|---|---|---|---|---|
| Run #1 | 43.384115 | 6.845824 | 8.083512 | 36.741458 |
| Run #2 | 42.890710 | 6.924576 | 8.161453 | 36.390578 |
| Run #3 | 39.855744 | 7.451874 | 8.059147 | 36.852537 |
| Run #4 | 39.514839 | 7.516164 | 7.915563 | 37.521020 |
| Run #5 | 38.971134 | 7.621025 | 7.955797 | 37.331271 |
In [ ]:
fin = df_times.sum() / len(df_times)
fin
Out[ ]:
default 40.923309 default_it_per_sec 7.271893 spatial 8.035094 spatial_it_per_sec 36.967373 dtype: float64
In [ ]:
fin['default'] / fin['spatial']
Out[ ]:
5.0930714039938225
Network Analysis¶
Top $k$ most influential Stops¶
In [ ]:
net_analysis.from_net(net=net, dijkstra_engine=dijkstra, alg='tree')
100%|██████████| 4397/4397 [02:26<00:00, 29.94it/s]
In [ ]:
def print_top(k = 10):
print('[Top k = {} most influential Stops]'.format(k))
top_stops_id = net_analysis.top_scores(10)
for stop_id in top_stops_id:
print(stop_id, net_analysis.scores[stop_id])
print_top()
[Top k = 10 most influential Stops] 268 2625614 267 2625614 1239 2614406 1115 2613916 1393 2607219 1152 2604321 270 2311589 271 2310622 174 2294866 510 2261665
In [ ]:
im_stops_df = pd.DataFrame(
{
**stops[stop_id].to_dict(),
"Score": net_analysis.scores[stop_id]
}
for stop_id in net_analysis.top_scores(4397)
)
im_stops_df = im_stops_df[['StopId', 'Code', 'Name', 'StopType', 'Zone', 'Street', 'Lng', 'Lat', 'Score']].sort_values(by='Score')
In [ ]:
fig = px.scatter_geo(
im_stops_df, lat='Lat', lon='Lng', color='Score', width=1400, height=1000, color_continuous_scale='rdpu', range_color=[0, 2000000],
hover_data=['StopId', 'Code', 'Name', 'Zone', 'Score']
)
fig.update_layout(
title='Spatial distribution of Betweenness score',
font=dict(
size=17
),
geo=dict(
scope = 'asia',
resolution = 110,
lonaxis_range= [106.4, 106.9],
lataxis_range= [10.6, 11],
landcolor = 'rgb(240, 240, 240)',
)
)
fig.show(renderer='notebook')
Table of Top $k=10$ Stops¶
In [ ]:
stop_list = [
{
**stops[stop_id].to_dict(),
"Score": net_analysis.scores[stop_id]
}
for stop_id in net_analysis.top_scores()
]
df = pd.DataFrame(stop_list, index=pd.RangeIndex(1, 11))
df['Routes'] = df['Routes'].apply(lambda routes: len(routes.split()))
df['Location'] = df['Street'] + ', ' + df['Zone']
df = df[['StopId', 'Name', 'Location', 'Street', 'Zone', 'StopType', 'Score']]
df
Out[ ]:
| StopId | Name | Location | Street | Zone | StopType | Score | |
|---|---|---|---|---|---|---|---|
| 1 | 268 | Mũi tàu Cộng Hòa | Trường Chinh, Quận Tân Bình | Trường Chinh | Quận Tân Bình | Trụ dừng | 2625614 |
| 2 | 267 | Ngã ba Chế Lan Viên | Trường Chinh, Quận Tân Bình | Trường Chinh | Quận Tân Bình | Nhà chờ | 2625614 |
| 3 | 1239 | Bến xe An Sương | Quốc lộ 22, Huyện Hóc Môn | Quốc lộ 22 | Huyện Hóc Môn | Trụ dừng | 2614406 |
| 4 | 1115 | Bến xe An Sương | Quốc lộ 22, Quận 12 | Quốc lộ 22 | Quận 12 | Trụ dừng | 2613916 |
| 5 | 1393 | Ngã tư Trung Chánh | Quốc lộ 22, Huyện Hóc Môn | Quốc lộ 22 | Huyện Hóc Môn | Nhà chờ | 2607219 |
| 6 | 1152 | Trung tâm Văn hóa Quận 12 | Quốc lộ 22, Quận 12 | Quốc lộ 22 | Quận 12 | Nhà chờ | 2604321 |
| 7 | 270 | UBND Phường 15 | Trường Chinh, Quận Tân Bình | Trường Chinh | Quận Tân Bình | Nhà chờ | 2311589 |
| 8 | 271 | Khu Công Nghiệp Tân Bình | Trường Chinh, Quận Tân Bình | Trường Chinh | Quận Tân Bình | Nhà chờ | 2310622 |
| 9 | 174 | Trạm Dệt Thành Công | Trường Chinh, Quận Tân Phú | Trường Chinh | Quận Tân Phú | Nhà chờ | 2294866 |
| 10 | 510 | Bệnh viện Quận Tân Bình | Hoàng Văn Thụ, Quận Tân Bình | Hoàng Văn Thụ | Quận Tân Bình | Nhà chờ | 2261665 |
# of edge candidates $|E|$ per Stop¶
In [ ]:
def get_df(mode):
return pd.DataFrame(
{
"RouteId": route_id,
"RouteVarId": route_var_id,
"PathLength": path_len,
**pd.Series(per_stop).describe()
}
for (route_id, route_var_id), path_len, per_stop in BusNetwork._analyse_sides_set(sides_set_type=mode)
)
In [ ]:
df_spatial = get_df('spatial')
df_spatial['Type'] = 'spatial'
df_default = get_df('default')
df_default['Type'] = 'default'
sides_set_type = spatial
100%|██████████| 297/297 [00:06<00:00, 49.05it/s]
sides_set_type = default
100%|██████████| 297/297 [00:00<00:00, 603.26it/s]
In [ ]:
df_all = pd.concat([df_default, df_spatial]).reset_index().drop(columns='index')
df_all
Out[ ]:
| RouteId | RouteVarId | PathLength | count | mean | std | min | 25% | 50% | 75% | max | Type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3 | 5 | 483 | 53.0 | 483.0 | 0.000000 | 483.0 | 483.00 | 483.0 | 483.00 | 483.0 | default |
| 1 | 7 | 14 | 222 | 50.0 | 222.0 | 0.000000 | 222.0 | 222.00 | 222.0 | 222.00 | 222.0 | default |
| 2 | 1 | 1 | 249 | 29.0 | 249.0 | 0.000000 | 249.0 | 249.00 | 249.0 | 249.00 | 249.0 | default |
| 3 | 3 | 6 | 327 | 56.0 | 327.0 | 0.000000 | 327.0 | 327.00 | 327.0 | 327.00 | 327.0 | default |
| 4 | 7 | 13 | 235 | 44.0 | 235.0 | 0.000000 | 235.0 | 235.00 | 235.0 | 235.00 | 235.0 | default |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 589 | 313 | 1 | 99 | 2.0 | 2.5 | 0.707107 | 2.0 | 2.25 | 2.5 | 2.75 | 3.0 | spatial |
| 590 | 314 | 2 | 73 | 2.0 | 1.5 | 0.707107 | 1.0 | 1.25 | 1.5 | 1.75 | 2.0 | spatial |
| 591 | 313 | 2 | 30 | 2.0 | 1.5 | 0.707107 | 1.0 | 1.25 | 1.5 | 1.75 | 2.0 | spatial |
| 592 | 337 | 1 | 54 | 5.0 | 6.8 | 3.898718 | 3.0 | 4.00 | 5.0 | 11.00 | 11.0 | spatial |
| 593 | 337 | 2 | 52 | 5.0 | 7.2 | 3.271085 | 4.0 | 4.00 | 7.0 | 10.00 | 11.0 | spatial |
594 rows × 12 columns
In [ ]:
df_default['mean'].describe()
Out[ ]:
count 297.000000 mean 247.265993 std 155.000806 min 6.000000 25% 154.000000 50% 240.000000 75% 333.000000 max 781.000000 Name: mean, dtype: float64
In [ ]:
df_spatial['mean'].describe()
Out[ ]:
count 297.000000 mean 6.320350 std 2.569525 min 1.000000 25% 4.650000 50% 6.275000 75% 7.660000 max 14.040000 Name: mean, dtype: float64
In [ ]:
fig = px.scatter(
df_all, x='PathLength', y='count', marginal_x='box', marginal_y='box', width=1600, height=900,
trendline='ols', trendline_options={"add_constant": False},
hover_data=['RouteId', 'RouteVarId', 'count', 'mean'],
color='Type'
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
df_all, x='PathLength', y='mean', marginal_x='box', marginal_y='box', log_y=True, width=1600, height=900,
trendline='ols', trendline_options={"add_constant": True},
hover_data=['RouteId', 'RouteVarId', 'count', 'mean'],
color='Type'
)
fig.update_xaxes(title="# of Path sides")
fig.update_yaxes(title="# of candidate edges |E|")
fig.update_layout(
font=dict(
size=20,
)
)
fig.show(renderer='notebook')
Running time between Betweenness Analysis algorithms¶
In [ ]:
def run_analysis(alg: str):
_net_analysis = NetworkAnalysisBetweenness()
_net_analysis.from_net(net=net, dijkstra_engine=None, alg=alg)
NO_RUNS = 5
times = {}
for mode in ['default', 'tree']:
print('Running mode', mode)
times[mode] = [
timeit.timeit("run_analysis(alg=mode)", globals=globals(), number=1)
for _ in range(NO_RUNS)
]
times
Running mode default
100%|██████████| 4397/4397 [09:28<00:00, 7.74it/s] 100%|██████████| 4397/4397 [08:54<00:00, 8.22it/s] 100%|██████████| 4397/4397 [07:40<00:00, 9.55it/s] 100%|██████████| 4397/4397 [07:30<00:00, 9.76it/s] 100%|██████████| 4397/4397 [07:16<00:00, 10.06it/s]
Running mode tree
100%|██████████| 4397/4397 [02:28<00:00, 29.56it/s] 100%|██████████| 4397/4397 [02:21<00:00, 31.05it/s] 100%|██████████| 4397/4397 [02:23<00:00, 30.59it/s] 100%|██████████| 4397/4397 [02:17<00:00, 32.01it/s] 100%|██████████| 4397/4397 [02:17<00:00, 31.92it/s]
Out[ ]:
{'default': [568.2074843998998,
534.7812418001704,
460.3806853001006,
450.74478500010446,
436.9792862001341],
'tree': [148.74347339989617,
141.63267030008137,
143.76318740006536,
137.3843427998945,
137.74046280002221]}
In [ ]:
df_times = pd.DataFrame(times, index=['Run #{}'.format(x) for x in range(1, 1 + NO_RUNS)])
NO_ITERS = 4397
for mode in ['default', 'tree']:
df_times[mode + '_it_per_sec'] = NO_ITERS / df_times[mode]
df_times = df_times[['default', 'default_it_per_sec', 'tree', 'tree_it_per_sec']]
df_times
Out[ ]:
| default | default_it_per_sec | tree | tree_it_per_sec | |
|---|---|---|---|---|
| Run #1 | 568.207484 | 7.738370 | 148.743473 | 29.560961 |
| Run #2 | 534.781242 | 8.222054 | 141.632670 | 31.045097 |
| Run #3 | 460.380685 | 9.550792 | 143.763187 | 30.585020 |
| Run #4 | 450.744785 | 9.754966 | 137.384343 | 32.005103 |
| Run #5 | 436.979286 | 10.062262 | 137.740463 | 31.922355 |
In [ ]:
fin = df_times.sum() / len(df_times)
fin
Out[ ]:
default 490.218697 default_it_per_sec 9.065689 tree 141.852827 tree_it_per_sec 31.023707 dtype: float64
In [ ]:
print('Improvement: {}%'.format((fin['default'] / fin['tree'] * 100).round(2)))
Improvement: 345.58%
Search-space efficient shortest path algorihms¶
In [ ]:
from importlib import reload
# reload(NetworkDijkstraSingleDestination)
In [ ]:
from network.shortest_paths import NetworkDijkstraSingleDestination, NetworkBidirectionalDijkstra
from network.shortest_paths.contraction_hierarchies import (
NetworkContractionHierarchiesRandom,
NetworkContractionHierarchiesPeriodicED,
NetworkContractionHierarchiesLazyED
)
In [ ]:
stops_df = pd.DataFrame(
{
**stops[stop_id].to_dict()
}
for stop_id in stops.ids
)
stops_df = stops_df[['StopId', 'Code', 'Name', 'Zone', 'Street', 'Lng', 'Lat']]
stops_df
Out[ ]:
| StopId | Code | Name | Zone | Street | Lng | Lat | |
|---|---|---|---|---|---|---|---|
| 0 | 35 | BX 01 | Bến xe buýt Sài Gòn | Quận 1 | Lê Lai | 106.689362 | 10.767676 |
| 1 | 7276 | Q1 190 | Tôn Thất Tùng | Quận 1 | Phạm Ngũ Lão | 106.690941 | 10.767670 |
| 2 | 7277 | Q1 191 | Nguyễn Thị Nghĩa | Quận 1 | Phạm Ngũ Lão | 106.693699 | 10.768788 |
| 3 | 7278 | Q1 192 | Trường Emst Thalmann | Quận 1 | Phạm Ngũ Lão | 106.695587 | 10.769504 |
| 4 | 7265 | Q1 179 | Trạm Trung chuyển trên đường Hàm Nghi | Quận 1 | Hàm Nghi | 106.700259 | 10.770911 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 4392 | 7682 | SWB1_1 | Ga tàu thuỷ Bạch Đằng | Quận 1 | Tôn Đức Thắng | 106.706901 | 10.775264 |
| 4393 | 7683 | SWB1_4 | Ga tàu thuỷ Bình An | Quận 2 | Đường số 21 | 106.728734 | 10.797349 |
| 4394 | 7684 | SWB1_8 | Ga tàu thuỷ Thanh Đa | Quận Bình Thạnh | Cư xá Thanh Đa | 106.715757 | 10.819385 |
| 4395 | 7685 | SWB1_10 | Ga tàu thuỷ Hiệp Bình Chánh | Quận Thủ Đức | Đường số 10 | 106.731475 | 10.827231 |
| 4396 | 7686 | SWB1_11 | Ga tàu thuỷ Linh Đông | Quận Thủ Đức | Đường số 36 | 106.746254 | 10.835287 |
4397 rows × 7 columns
In [ ]:
fig = px.scatter_geo(
stops_df, lat='Lat', lon='Lng', color='Zone', width=1400, height=1000,
hover_data=['StopId', 'Code', 'Name', 'Zone']
)
fig.update_layout(
title='Spatial distribution of StopType',
font=dict(
size=17
),
geo=dict(
scope = 'asia',
resolution = 110,
lonaxis_range= [106.4, 106.9],
lataxis_range= [10.6, 11],
landcolor = 'rgb(240, 240, 240)',
)
)
fig.show(renderer='notebook')
In [ ]:
dijkstra_1 = NetworkDijkstraSingleDestination.from_net(net=net)
dijkstra_2 = NetworkBidirectionalDijkstra.from_net(net=net)
ch_0 = NetworkContractionHierarchiesRandom.from_net(net=net)
ch_1 = NetworkContractionHierarchiesPeriodicED.from_net(net=net)
ch_2 = NetworkContractionHierarchiesLazyED.from_net(net=net)
100%|██████████| 4397/4397 [00:37<00:00, 118.49it/s] 100%|██████████| 4397/4397 [00:41<00:00, 106.79it/s] 100%|██████████| 4397/4397 [00:00<00:00, 4936.87it/s] 100%|██████████| 4397/4397 [00:03<00:00, 1245.85it/s]
In [ ]:
NODES_AT_LEVEL = [
[2045, 455, 2342, 271],
[3223, 117, 4263, 521],
[962, 3901, 7167, 7213]
]
In [ ]:
pd.DataFrame(
{
name: {
'Shortcuts': engine.no_shortcuts,
'Density': engine.no_shortcuts / len(net.nodes)
}
for name, (engine) in zip(names[2:], [ch_0, ch_1, ch_2])
}
).round(2)
Out[ ]:
| CH (random node order) | CH (periodic ED update) | CH (lazy ED update) | |
|---|---|---|---|
| Shortcuts | 54210.00 | 13567.00 | 7447.00 |
| Density | 12.33 | 3.09 | 1.69 |
In [ ]:
def analyse_pair(src: int, dest: int):
dist_1, _ = dijkstra_1.path(src, dest)
dist_2, _ = dijkstra_2.path(src, dest)
dist_3, _ = ch_0.path(src, dest)
dist_4, _ = ch_1.path(src, dest)
dist_5, _ = ch_2.path(src, dest)
return [
(dist_1, dijkstra_1),
(dist_2, dijkstra_2),
(dist_3, ch_0),
(dist_4, ch_1),
(dist_5, ch_2)
]
names = [
'Dijkstra',
'Bidirectional Dijkstra',
'CH (random node order)',
'CH (periodic ED update)',
'CH (lazy ED update)',
]
def analyse_pair_length(src: int, dest: int):
analysis = analyse_pair(src, dest)
dist = analysis[0][0]
return {
'Src': src,
'Dest': dest,
'Dist': dist,
**{
# engine.__class__.__name__: len(engine.search_space)
name: len(engine.search_space)
for name, (d, engine) in zip(names, analysis)
}
}
analyse_pair_length(2045, 455)
Out[ ]:
{'Src': 2045,
'Dest': 455,
'Dist': 33.23761501573184,
'Dijkstra': 589,
'Bidirectional Dijkstra': 316,
'CH (random node order)': 398,
'CH (periodic ED update)': 165,
'CH (lazy ED update)': 70}
In [ ]:
df = pd.DataFrame([
dict({
'Src': x,
'Dest': y,
**analyse_pair_length(x, y)
})
for x, y in tqdm(random.sample(list(iters.combinations(stops.ids, 2)), k=3000))
# for nodes_lvl in NODES_AT_LEVEL
# for (x, y) in random.sample(
# list(iters.combinations(nodes_lvl, 2)),
# k=2
# )
]).sort_values(by='Dist').reset_index().drop(columns=['index'])
df
Out[ ]:
| Src | Dest | Dist | Dijkstra | Bidirectional Dijkstra | CH (random node order) | CH (periodic ED update) | CH (lazy ED update) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2150 | 3020 | 1.027989 | 5 | 5 | 24 | 7 | 10 |
| 1 | 2584 | 2595 | 1.877346 | 9 | 8 | 71 | 32 | 23 |
| 2 | 1298 | 2712 | 2.301905 | 5 | 5 | 72 | 5 | 6 |
| 3 | 4294 | 2717 | 3.095989 | 5 | 7 | 138 | 10 | 30 |
| 4 | 7265 | 120 | 3.166735 | 43 | 44 | 264 | 44 | 37 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2995 | 1094 | 7511 | inf | 4361 | 4364 | 428 | 150 | 88 |
| 2996 | 3569 | 7495 | inf | 4361 | 4365 | 207 | 117 | 30 |
| 2997 | 859 | 7489 | inf | 4361 | 4366 | 362 | 66 | 4 |
| 2998 | 2558 | 7492 | inf | 4361 | 4365 | 272 | 121 | 64 |
| 2999 | 1170 | 7489 | inf | 4361 | 4366 | 59 | 98 | 55 |
3000 rows × 8 columns
In [ ]:
df = df[df['Dist'] != float('inf')]
In [ ]:
df.describe().drop(columns=['Src', 'Dest']).round(2)
Out[ ]:
| Dist | Dijkstra | Bidirectional Dijkstra | CH (random node order) | CH (periodic ED update) | CH (lazy ED update) | |
|---|---|---|---|---|---|---|
| count | 2946.00 | 2946.00 | 2946.00 | 2946.00 | 2946.00 | 2946.00 |
| mean | 72.26 | 2481.48 | 611.56 | 278.29 | 125.60 | 68.76 |
| std | 43.65 | 1233.94 | 380.82 | 93.87 | 55.22 | 20.24 |
| min | 1.03 | 5.00 | 5.00 | 9.00 | 5.00 | 6.00 |
| 25% | 42.08 | 1470.50 | 290.25 | 214.00 | 86.00 | 56.00 |
| 50% | 61.21 | 2642.50 | 570.50 | 296.00 | 118.00 | 71.00 |
| 75% | 92.35 | 3546.00 | 887.00 | 349.00 | 155.00 | 84.00 |
| max | 310.85 | 4361.00 | 1669.00 | 490.00 | 346.00 | 126.00 |
In [ ]:
analyse_pair(35, 345)
Out[ ]:
[(45.836360352544396, <network.shortest_paths.dijkstra.NetworkDijkstraSingleDestination at 0x1a2f4355490>), (45.83636035254439, <network.shortest_paths.bidirectional_dijkstra.NetworkBidirectionalDijkstra at 0x1a2f2d52f90>), (45.83636035254439, <network.shortest_paths.contraction_hierarchies.random.NetworkContractionHierarchiesRandom at 0x1a2f37aba90>), (45.836360352544396, <network.shortest_paths.contraction_hierarchies.periodic_ED.NetworkContractionHierarchiesPeriodicED at 0x1a2f3793bd0>), (45.83636035254439, <network.shortest_paths.contraction_hierarchies.lazy_ED.NetworkContractionHierarchiesLazyED at 0x1a2f68cd610>)]
In [ ]:
fig = ff.create_distplot(
[df['Dist']],
group_labels = ['Dist'],
bin_size = 5,
show_rug = False,
# width=1400,
# height=1000,
# trendline="lowess",
# trendline_options=dict(frac=0.1),
# color_discrete_sequence=px.colors.qualitative.T10[:],
)
fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Count")
fig.update_layout(
font=dict(
size=18,
),
title_text="Actual Distance distribution of generated pairs"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
df,
width=1400,
height=700,
x = 'Dist',
y = [
'Dijkstra',
'Bidirectional Dijkstra'
],
opacity=0.10,
trendline="lowess",
trendline_options=dict(frac=0.1),
color_discrete_sequence=px.colors.qualitative.T10[:],
marginal_y = "box"
)
fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")
fig.update_layout(
font=dict(
size=18,
),
title="Performance comparison: Dijkstra vs. Bidirectional Dijkstra"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
df,
width=1400,
height=700,
x = 'Dist',
y = [
'CH (random node order)',
'CH (periodic ED update)',
'CH (lazy ED update)'
],
opacity=0.10,
trendline="lowess",
trendline_options=dict(frac=0.1),
color_discrete_sequence=px.colors.qualitative.T10[2:],
marginal_y="box"
)
fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")
fig.update_layout(
font=dict(
size=18,
),
title="Performance comparison: Contraction Hierarchies variants"
)
fig.show(renderer='notebook')
In [ ]:
fig = px.scatter(
df,
width=1400,
height=700,
x = df['Dist'],
y = [df[col] for col in [
'Bidirectional Dijkstra',
'CH (random node order)',
'CH (lazy ED update)'
]],
log_y = True,
opacity=0.10,
trendline="lowess",
trendline_options=dict(frac=0.1),
color_discrete_sequence=[
px.colors.qualitative.T10[x]
for x in [1, 2, 4]
],
marginal_y = "box"
)
fig.update_xaxes(title="Actual Distance")
fig.update_yaxes(title="Search space size")
fig.update_layout(
font=dict(
size=18,
),
title="Performance comparison: Bidirectional Dijkstra vs. Contraction Hierarchies"
)
fig.show(renderer='notebook')